Street Flooding Complaints (SFC)#

Import Libraries#

Built-in Libraries#

import json
import os

External Libraries#

import pyproj
import geopandas as gpd
import pandas as pd
# import geojson as gj

311 Service Requests from 2010 to Present#

About#

Key

Value

URL

https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9

Description

All 311 Service Requests from 2010 to present.

Updated

2023-02-13

Views

440K+

Data Provided by

311, DoITT

Category

Social Services

API Docs

https://dev.socrata.com/foundry/data.cityofnewyork.us/erm2-nwe9

API Endpoints
(sample size: 10)

JSON
GeoJSON
CSV

complaint_type

Sewer

descriptor

Street Flooding (SJ)

Define Variables#

Default limit = 1000

Ref: Paging through Data

%%script echo skip
NYC_OPEN_DATA_311_API_JSON = 'https://data.cityofnewyork.us/resource/erm2-nwe9.json?descriptor=Street%20Flooding%20(SJ)'
NYC_OPEN_DATA_311_API_GEOJSON = 'https://data.cityofnewyork.us/resource/erm2-nwe9.geojson?descriptor=Street%20Flooding%20(SJ)'
NYC_OPEN_DATA_311_API_CSV = 'https://data.cityofnewyork.us/resource/erm2-nwe9.csv?descriptor=Street%20Flooding%20(SJ)'
skip

Download 311 Service Complaints for Street Flooding (SJ)#

Define prefix for output variable#

%%script echo skip
output_prefix = 'data/street_flood-complaints.'
skip

Save .json data locally#

%%script echo skip
street_flooding_jdf = pd.read_json(NYC_OPEN_DATA_311_API_JSON)
street_flooding_jdf.to_json(output_prefix + 'json')
skip

Save .geojson data locally#

%%script echo skip
street_flooding_gdf = gpd.read_file(NYC_OPEN_DATA_311_API_GEOJSON, driver='GeoJSON')
street_flooding_gdf.to_file(output_prefix + 'geojson')
skip
def get_street_flooding_data(file_type: str = 'geojson') -> None:
    """_summary_

    Args:
        file_type (str, optional): _description_. Defaults to 'geojson'.
    """
    df_size = -1
    file_size = 10000
    limit = file_size
    current_file = 0
    output_prefix = 'data/street_flood-complaints'
    while df_size != 0:
        street_flooding_df = gpd.read_file(get_api_endpoint(limit, current_file), driver='GeoJSON')
        df_size = len(street_flooding_df)
        if df_size == 0:
            break
        else:
            file_name_output = get_output_file_name(output_prefix, limit, current_file, file_type)
            street_flooding_df.to_file(file_name_output)
            print(f'Save file {current_file + 1}: {file_name_output}')
            current_file += 1
        
def get_api_endpoint(limit: int, current_file: int) -> str:
    """_summary_

    Args:
        limit (int): _description_
        current_file (int): _description_

    Returns:
        str: _description_
    """
    offset = limit * current_file
    return f'https://data.cityofnewyork.us/resource/erm2-nwe9.geojson?descriptor=Street%20Flooding%20(SJ)&$limit={limit}&$offset={offset}&$order=unique_key'

def get_output_file_name(output_prefix: str, limit: int, current_file: int, file_type: str):
    """_summary_

    Args:
        output_prefix (str): _description_
        limit (int): _description_
        current_file (int): _description_
        file_type (str): _description_

    Returns:
        _type_: _description_
    """
    start_num = 1 + (limit * current_file)
    end_num = (1 + current_file) * limit
    return f'{output_prefix}_{start_num :06d}_{end_num :06d}.{file_type}'
%%script echo skip
output_prefix = 'data/street_flood-complaints'
file_size = 10000
limit = file_size
current_file = 0
file_type = 'geojson'

get_output_file_name(output_prefix, limit, current_file, file_type)
skip
%%script echo "skip: refactor to check if already downloaded"
get_street_flooding_data(file_type = 'geojson')
skip: refactor to check if already downloaded
geojson_file_list = ['data/' + geojson_file for geojson_file in os.listdir('data/') if geojson_file.endswith('.geojson')]
# print(geojson_file_list)
geojson_df_list = list()

for geojson_file in geojson_file_list:
    geojson_file_df = gpd.read_file(geojson_file, driver='GeoJSON')
    geojson_df_list.append(geojson_file_df)

street_flooding_gdf = pd.concat(geojson_df_list)

Save .csv data locally#

%%script echo skip
street_flooding_cdf = pd.read_csv(NYC_OPEN_DATA_311_API_CSV)
street_flooding_cdf.to_csv(output_prefix + 'csv')
skip

View Street Flooding Metadata#

street_flooding_gdf.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 35006 entries, 0 to 5005
Data columns (total 45 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   location_state                  33999 non-null  object        
 1   facility_type                   23296 non-null  object        
 2   intersection_street_2           11289 non-null  object        
 3   city                            34141 non-null  object        
 4   location_zip                    33999 non-null  object        
 5   park_borough                    35002 non-null  object        
 6   latitude                        33999 non-null  object        
 7   road_ramp                       0 non-null      float64       
 8   created_date                    35006 non-null  datetime64[ns]
 9   agency                          35006 non-null  object        
 10  park_facility_name              35006 non-null  object        
 11  location_address                33999 non-null  object        
 12  agency_name                     35006 non-null  object        
 13  descriptor                      35006 non-null  object        
 14  bbl                             21669 non-null  object        
 15  location_city                   33999 non-null  object        
 16  open_data_channel_type          35006 non-null  object        
 17  cross_street_2                  30035 non-null  object        
 18  bridge_highway_direction        0 non-null      float64       
 19  longitude                       33999 non-null  object        
 20  bridge_highway_segment          0 non-null      float64       
 21  street_name                     23769 non-null  object        
 22  incident_address                23769 non-null  object        
 23  address_type                    35000 non-null  object        
 24  incident_zip                    34137 non-null  object        
 25  unique_key                      35006 non-null  object        
 26  complaint_type                  35006 non-null  object        
 27  y_coordinate_state_plane        33999 non-null  object        
 28  status                          35006 non-null  object        
 29  bridge_highway_name             0 non-null      float64       
 30  location_type                   0 non-null      float64       
 31  due_date                        1 non-null      datetime64[ns]
 32  taxi_company_borough            0 non-null      float64       
 33  taxi_pick_up_location           0 non-null      float64       
 34  x_coordinate_state_plane        33999 non-null  object        
 35  resolution_description          34989 non-null  object        
 36  community_board                 35002 non-null  object        
 37  resolution_action_updated_date  34997 non-null  datetime64[ns]
 38  intersection_street_1           11289 non-null  object        
 39  closed_date                     34989 non-null  datetime64[ns]
 40  vehicle_type                    0 non-null      float64       
 41  cross_street_1                  30045 non-null  object        
 42  borough                         35002 non-null  object        
 43  landmark                        0 non-null      float64       
 44  geometry                        33999 non-null  geometry      
dtypes: datetime64[ns](4), float64(9), geometry(1), object(31)
memory usage: 12.3+ MB

Convert datetime64 data type to string#

# created_date, resolution_action_updated_date, closed_date

street_flooding_gdf['created_date'] = street_flooding_gdf['created_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
street_flooding_gdf['resolution_action_updated_date'] = street_flooding_gdf['resolution_action_updated_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
street_flooding_gdf['closed_date'] = street_flooding_gdf['closed_date'].dt.strftime('%Y-%m-%d %H:%M:%S')

Set unique_key as Index#

street_flooding_gdf.set_index('unique_key', inplace=True)

Remove Rows With Missing geometry#

street_flooding_gdf.dropna(subset = ['geometry'], inplace = True)

Preview Street Flooding Data#

street_flooding_gdf[['created_date', 'borough', 'bbl', 'geometry']].head(10)
created_date borough bbl geometry
unique_key
15639934 2010-01-02 08:26:00 BROOKLYN 3089000064 POINT (-73.92178 40.58778)
15640572 2010-01-02 12:00:00 STATEN ISLAND NaN POINT (-74.14329 40.63866)
15640664 2010-01-02 17:45:00 QUEENS 4120050012 POINT (-73.79530 40.68140)
15655327 2010-01-04 16:47:00 QUEENS 4106210008 POINT (-73.73843 40.72006)
15668560 2010-01-05 10:37:00 BROOKLYN 3086550021 POINT (-73.90969 40.61250)
15674300 2010-01-06 19:26:00 BROOKLYN 3029270015 POINT (-73.93297 40.71584)
15674896 2010-01-06 08:24:00 QUEENS 4119960122 POINT (-73.80255 40.67925)
15674924 2010-01-06 09:17:00 STATEN ISLAND 5040740044 POINT (-74.10646 40.55866)
15675505 2010-01-06 06:00:00 QUEENS 4030030044 POINT (-73.87694 40.71804)
15683503 2010-01-07 10:16:00 STATEN ISLAND 5014850078 POINT (-74.14943 40.61979)

View on Map#

street_flooding_gdf['geometry'] = street_flooding_gdf.geometry
popup_columns = [
    'geometry',
    'created_date',
    'incident_address',
    'city',
    'incident_zip',
    'borough',
    'bbl',
    'status',
]
street_flooding_gdf[popup_columns].explore('borough')
Make this Notebook Trusted to load map: File -> Trust Notebook

References#

GeoPandas#

Reading and Writing Files | GoePandas Documentation

pyproj#

On fresh Conda installation of PyProj: pyproj unable to set database path. _pyproj_global_context_initialize()

Fix#

Un-install pyproj

conda remove --force pyproj

Re-install pyproj via pip instead of conda

pip install pyproj